"""Datawarehouse failure triager."""
import argparse
import copy
from http import HTTPStatus
import os
import pathlib
import typing

from cki_lib import messagequeue
from cki_lib import metrics
from cki_lib import misc
from cki_lib.kcidb.file import KCIDBFile
from cki_lib.logger import get_logger
import requests.exceptions

from . import checkers
from . import dwobject

LOGGER = get_logger('cki.triager')
IS_PRODUCTION_OR_STAGING = misc.is_production_or_staging()

OBJECT_TYPE_TRIAGED = {'checkout', 'build', 'test'}
STATUS_TRIAGED = {'new', 'updated', 'needs_triage'}


class Triager:
    """Triage and report a KCIDB object."""

    def __init__(self) -> None:
        """Create instance."""
        # {(obj.type, obj.id): tagged_issue_ids}
        self.issue_cache: dict[tuple[str, str], set[int]] = {}

    def needs_linking(self, match: checkers.RegexMatch) -> typing.Optional[tuple[str, str]]:
        """Return the cache key if an object is not already tagged with an issue."""
        if (key := (match.log_file.dw_obj.type, match.log_file.dw_obj.id)) not in self.issue_cache:
            self.issue_cache[key] = {i.id for i in match.log_file.dw_obj.issues.list()}
        return key if match.regex.issue_id not in self.issue_cache[key] else None

    def report_issues(self, matches: list[checkers.RegexMatch]) -> None:
        """Report a list of issues."""
        for match in matches:
            if not (key := self.needs_linking(match)):
                LOGGER.debug('Already linked: obj=%s to issue=%s',
                             match.log_file.dw_obj, match.regex.issue_id)
                continue
            if IS_PRODUCTION_OR_STAGING:
                LOGGER.info('Linking obj=%s to issue=%s',
                            match.log_file.dw_obj, match.regex.issue_id)
                # we could actually report the matching log file and regex id
                # as well, but DW currently has no support for that 😕
                match.log_file.dw_obj.issues.create(issue_id=match.regex.issue_id)
            else:
                LOGGER.info('Would link obj=%s to issue=%s in prod',
                            match.log_file.dw_obj, match.regex.issue_id)
            self.issue_cache[key].add(match.regex.issue_id)

    def check(
        self, dw_obj: typing.Any, issueregex_ids: list[int], *, to_dw: bool = True
    ) -> list[dict]:
        """Check object for issues."""
        LOGGER.debug('Checking type=%s id=%s', dw_obj.type, dw_obj.id)
        triage_result = checkers.triage(dw_obj, issueregex_ids)
        matches = [m for m in triage_result.matches if m.status == checkers.MatchStatus.FULL_MATCH]
        issueoccurrences = [checkers.get_issueoccurrence_from_match(m) for m in matches]
        if not to_dw:
            return issueoccurrences

        # Reporting the issues to the DWH continues from here
        self.report_issues(matches)

        if triage_result.status == checkers.TriageStatus.INCOMPLETE:
            LOGGER.info('Not marking as triaged because incomplete obj=%s', dw_obj)
        elif issueregex_ids and not triage_result.matches:
            LOGGER.info('Not marking as triaged as individual regex without matches obj=%s',
                        dw_obj)
        elif IS_PRODUCTION_OR_STAGING:
            try:
                # Tag this object as triaged
                dw_obj.action_triaged.create()
            except requests.exceptions.HTTPError as exception:
                # the object might have been deleted in the meanwhile
                if exception.response.status_code == HTTPStatus.NOT_FOUND:
                    LOGGER.error("Unable to find %r, assuming it was deleted", dw_obj)
                else:
                    raise
        else:
            LOGGER.info('Would mark obj=%s as triaged in prod', dw_obj)
        return issueoccurrences


def triage_from_file(
    kcidb_file: KCIDBFile, issueregex_ids: list[int], *, to_dw: bool, **kwargs
) -> list[dict]:
    """
    Run Triager().check on all objects in the given file, optionally filtered.

    Args:
        kcidb_file: KCIDBFile object from cki_lib.kcidb.file.
        issueregex_ids: a list of issueregex_ids.
        to_dw: whether issue occurrences should be submitted to DataWarehouse
        kwargs: optional arguments to filter data

    Returns:
        A list of issueoccurrences (each issueoccurrence is a dict).
    """
    # Deep copy is necessary to avoid modification of kcidb_file when adding related_build and
    # related_checkout to tests.misc. This is because misc.related_build and misc.related_checkout
    # can't be serialized by json.dump.
    all_data = copy.deepcopy(dwobject.filter_data(kcidb_file.data, **kwargs))
    _checkouts = {}
    _builds = {}
    issueoccurrences = []
    for obj_type in ['checkout', 'build', 'test']:
        for kcidb_obj in all_data.get(f"{obj_type}s", []):
            dw_obj = dwobject.from_attrs(obj_type, attrs=kcidb_obj)
            match obj_type:
                case "checkout":
                    _checkouts[dw_obj.id] = dw_obj
                case "build":
                    _builds[dw_obj.id] = dw_obj
                    related_checkout = _checkouts.get(dw_obj.checkout_id)
                    dw_obj.misc["related_checkout"] = related_checkout
                case _:
                    # Attach related_build and related_checkout to dw_obj (only for KCIDBTest)
                    related_build = _builds.get(dw_obj.build_id)
                    dw_obj.misc["related_build"] = related_build
                    dw_obj.misc["related_checkout"] = related_build.misc["related_checkout"]
            issueoccurrences += Triager().check(dw_obj, issueregex_ids, to_dw=to_dw)
    return issueoccurrences


def callback(body: typing.Any = None, **_: typing.Any) -> None:
    """Handle a single message."""
    obj_data = body['object']
    obj_type = body['object_type']
    status = body['status']
    msg_misc = body.get('misc') or {}
    issueregex_ids = msg_misc.get('issueregex_ids') or []
    LOGGER.info('Got message for (%s) %s id=%s misc=%s',
                status, obj_type, obj_data['id'], msg_misc)
    if status in STATUS_TRIAGED and obj_type in OBJECT_TYPE_TRIAGED:
        dw_obj = dwobject.from_attrs(obj_type, attrs=obj_data)
        Triager().check(dw_obj, issueregex_ids)


def main(args: list[str] | None = None) -> None:
    """CLI Interface."""
    parser = argparse.ArgumentParser()

    # Action
    action_group = parser.add_mutually_exclusive_group(required=False)
    action_group.add_argument(
        "--listen",
        action="store_true",
        help=(
            "Start polling the message queue for messages to triage."
            " Defaults to this if no other option was selected."
        ),
    )
    action_group.add_argument(
        "--from-dw",
        action="store_true",
        help="Fetch KCIDB data from DataWarehouse. Defaults to this if given 'KCIDB object filter'",
    )
    action_group.add_argument(
        "--from-file",
        help="File to use (kcidb_all.json)",
        type=pathlib.Path,
    )

    # KCIDB object filter
    obj_filter_group = parser.add_argument_group(
        "KCIDB object filter",
        "Triage only the nested children and parents of the given object.",
    ).add_mutually_exclusive_group(required=False)
    obj_filter_group.add_argument(
        "--checkout-id",
        type=str,
        help="Triage the Checkout matching this ID, all its builds, tests and results.",
    )
    obj_filter_group.add_argument(
        "--build-id",
        type=str,
        help="Triage the Build matching this ID, its checkout, and all its tests and results.",
    )
    obj_filter_group.add_argument(
        "--test-id",
        type=str,
        help="Triage the Test matching this ID, its checkout and build, and all its results.",
    )

    parser.add_argument('--regex-id', help='Only check specified issue regex')
    parser.add_argument(
        "--to-file",
        help="Write the result to file, instead of stdout. Ignored unless --from-file is set.",
        default="/dev/stdout",
        type=pathlib.Path,
    )
    parser.add_argument(
        "--to-dw", help="Try to submit triaged issues to the Datawarehouse.", action="store_true"
    )
    arguments = parser.parse_args(args)

    issueregex_ids = [arguments.regex_id] if arguments.regex_id else []

    # Get value from --checkout-id, --build-id, --test-id, if defined
    kcidb_filter = next(
        (
            {"obj_type": kcidb_type, "obj_id": kcidb_id}
            for kcidb_type in ["checkout", "build", "test"]
            if (kcidb_id := getattr(arguments, f"{kcidb_type}_id"))
        ),
        None,
    )

    if from_file := arguments.from_file:
        if not from_file.exists():
            raise FileNotFoundError(f"Input file {from_file} not found!")
        kcidb_file = KCIDBFile(from_file)
        issueoccurrences = triage_from_file(
            kcidb_file=kcidb_file,
            issueregex_ids=issueregex_ids,
            to_dw=arguments.to_dw,
            **({f"{kcidb_filter["obj_type"]}_id": kcidb_filter["obj_id"]} if kcidb_filter else {}),
            # checkout_id=arguments.checkout_id,
            # build_id=arguments.build_id,
            # test_id=arguments.test_id,
        )
        if to_file := arguments.to_file:
            kcidb_file.data['issueoccurrences'] = issueoccurrences
            # NOTE: Disable the validation of KCIDBFile, as it doesn't support issueoccurrences.
            # Once the KCIDBFile supports issueoccurrences, the validation can be enabled back.
            kcidb_file.validate = False
            if from_file != to_file:
                kcidb_file.file_path = to_file
            kcidb_file.save()
        return

    if arguments.from_dw and not kcidb_filter:
        raise ValueError("Selected --from-dw without a KCIDB object to filter.")

    # Fetch data from DW to triage
    if arguments.from_dw or kcidb_filter:
        dw_obj = dwobject.from_obj_id(**kcidb_filter)
        Triager().check(dw_obj, issueregex_ids, to_dw=arguments.to_dw)
        return

    # Default behavior: triage data from message queue
    metrics.prometheus_init()
    LOGGER.info("Running checks on queue items.")
    messagequeue.MessageQueue().consume_messages(
        os.environ.get('WEBHOOK_RECEIVER_EXCHANGE', 'cki.exchange.webhooks'),
        os.environ['DATAWAREHOUSE_TRIAGER_ROUTING_KEYS'].split(),
        callback,
        queue_name=os.environ.get('DATAWAREHOUSE_TRIAGER_QUEUE'),
        max_priority=1)
